{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# AdventureWorks: resit questions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loading spark-stubs, spark-hive\n",
      "Adding Hive conf dir /opt/hive/conf to classpath\n",
      "Creating SparkSession\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "SLF4J: No SLF4J providers were found.\n",
      "SLF4J: Defaulting to no-operation (NOP) logger implementation\n",
      "SLF4J: See https://www.slf4j.org/codes.html#noProviders for further details.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<a target=\"_blank\" href=\"http://jupyter:4040\">Spark UI</a>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "\u001b[32mimport \u001b[39m\u001b[36m$ivy.$                                  \n",
       "\n",
       "\u001b[39m\n",
       "\u001b[32mimport \u001b[39m\u001b[36morg.apache.log4j.{Level, Logger}\n",
       "\u001b[39m\n",
       "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark._\n",
       "\u001b[39m\n",
       "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.sql._\n",
       "\u001b[39m\n",
       "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.sql.types._\n",
       "\u001b[39m\n",
       "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.sql.functions._\n",
       "\u001b[39m\n",
       "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.sql.expressions.Window\n",
       "\n",
       "\u001b[39m\n",
       "\u001b[36mspark\u001b[39m: \u001b[32mSparkSession\u001b[39m = org.apache.spark.sql.SparkSession@6fbe4f0e"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import $ivy.`org.apache.spark::spark-sql:3.4.0`\n",
    "\n",
    "import org.apache.log4j.{Level, Logger}\n",
    "Logger.getLogger(\"org\").setLevel(Level.OFF)\n",
    "\n",
    "import org.apache.spark._\n",
    "import org.apache.spark.sql._\n",
    "import org.apache.spark.sql.types._\n",
    "import org.apache.spark.sql.functions._\n",
    "import org.apache.spark.sql.expressions.Window\n",
    "\n",
    "val spark = {\n",
    "    NotebookSparkSession.builder()\n",
    "    .progress(false)\n",
    "    .appName(\"app14-4\")\n",
    "    // .master(\"spark://192.168.31.31:7077\")\n",
    "    .master(\"local[*]\")\n",
    "    .config(\"spark.sql.warehouse.dir\", \n",
    "            \"hdfs://192.168.31.31:9000/user/hive/warehouse\") \n",
    "    .config(\"spark.cores.max\", \"4\") \n",
    "    .config(\"spark.executor.instances\", \"1\") \n",
    "    .config(\"spark.executor.cores\", \"2\") \n",
    "    .config(\"spark.executor.memory\", \"10g\") \n",
    "    .config(\"spark.shuffle.service.enabled\", \"false\") \n",
    "    .config(\"spark.dynamicAllocation.enabled\", \"false\") \n",
    "    .config(\"spark.sql.catalogImplementation\", \"hive\")\n",
    "    .config(\"spark.sql.repl.eagerEval.enabled\", \"true\")\n",
    "    .config(\"spark.driver.allowMultipleContexts\", \"true\")\n",
    "    .getOrCreate()\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\u001b[32mimport \u001b[39m\u001b[36mspark.implicits._\n",
       "\u001b[39m\n",
       "defined \u001b[32mfunction\u001b[39m \u001b[36msc\u001b[39m\n",
       "\u001b[36mhiveCxt\u001b[39m: \u001b[32msql\u001b[39m.\u001b[32mhive\u001b[39m.\u001b[32mHiveContext\u001b[39m = org.apache.spark.sql.hive.HiveContext@795eddf3"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import spark.implicits._\n",
    "def sc = spark.sparkContext\n",
    "val hiveCxt = new org.apache.spark.sql.hive.HiveContext(sc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "defined \u001b[32mclass\u001b[39m \u001b[36mRichDF\u001b[39m"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "// Credit to Aivean\n",
    "implicit class RichDF(val ds:DataFrame) {\n",
    "    def showHTML(limit: Int = 50, truncate: Int = 100) = {\n",
    "        import xml.Utility.escape\n",
    "        val data = ds.take(limit)\n",
    "        val header = ds.schema.fieldNames.toSeq        \n",
    "        val rows: Seq[Seq[String]] = data.map { row =>\n",
    "          row.toSeq.map {cell =>\n",
    "            val str = cell match {\n",
    "              case null => \"null\"\n",
    "              case binary: Array[Byte] => binary.map(\"%02X\".format(_)).mkString(\"[\", \" \", \"]\")\n",
    "              case array: Array[_] => array.mkString(\"[\", \", \", \"]\")\n",
    "              case seq: Seq[_] => seq.mkString(\"[\", \", \", \"]\")\n",
    "              case _ => cell.toString\n",
    "            }\n",
    "            if (truncate > 0 && str.length > truncate) {\n",
    "              // do not show ellipses for strings shorter than 4 characters.\n",
    "              if (truncate < 4) str.substring(0, truncate)\n",
    "              else str.substring(0, truncate - 3) + \"...\"\n",
    "            } else {\n",
    "              str\n",
    "            }\n",
    "          }: Seq[String]\n",
    "        }\n",
    "    publish.html(s\"\"\" <table>\n",
    "                <tr>\n",
    "                 ${header.map(h => s\"<th>${escape(h)}</th>\").mkString}\n",
    "                </tr>\n",
    "                ${rows.map {row =>\n",
    "                  s\"<tr>${row.map{c => s\"<td>${escape(c)}</td>\" }.mkString}</tr>\"\n",
    "                }.mkString}\n",
    "            </table>\n",
    "        \"\"\")\n",
    "    }\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\u001b[36mcust_aw\u001b[39m: \u001b[32mDataFrame\u001b[39m = [customerid: int, namestyle: string ... 11 more fields]\n",
       "\u001b[36mcust_addr\u001b[39m: \u001b[32mDataFrame\u001b[39m = [customerid: int, addressid: int ... 1 more field]\n",
       "\u001b[36maddr\u001b[39m: \u001b[32mDataFrame\u001b[39m = [addressid: int, addressline1: string ... 5 more fields]\n",
       "\u001b[36mproduct\u001b[39m: \u001b[32mDataFrame\u001b[39m = [productid: int, name: string ... 12 more fields]\n",
       "\u001b[36morder_det\u001b[39m: \u001b[32mDataFrame\u001b[39m = [salesorderid: int, salesorderdetailid: int ... 4 more fields]\n",
       "\u001b[36morder_head\u001b[39m: \u001b[32mDataFrame\u001b[39m = [salesorderid: int, revisionnumber: int ... 17 more fields]\n",
       "\u001b[36mprod_model\u001b[39m: \u001b[32mDataFrame\u001b[39m = [productmodelid: int, name: string ... 1 more field]\n",
       "\u001b[36mprod_model_prod\u001b[39m: \u001b[32mDataFrame\u001b[39m = [productmodelid: int, productdescriptionid: int ... 1 more field]\n",
       "\u001b[36mprod_desc\u001b[39m: \u001b[32mDataFrame\u001b[39m = [productdescriptionid: int, description: string]\n",
       "\u001b[36mprod_cat\u001b[39m: \u001b[32mDataFrame\u001b[39m = [productcategoryid: int, parentproductcategoryid: int ... 1 more field]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "val cust_aw = hiveCxt.table(\"sqlzoo.CustomerAW\")\n",
    "val cust_addr = hiveCxt.table(\"sqlzoo.CustomerAddress\")\n",
    "val addr = hiveCxt.table(\"sqlzoo.Address\")\n",
    "val product = hiveCxt.table(\"sqlzoo.Product\")\n",
    "val order_det = hiveCxt.table(\"sqlzoo.SalesOrderDetail\")\n",
    "val order_head = hiveCxt.table(\"sqlzoo.SalesOrderHeader\")\n",
    "val prod_model = hiveCxt.table(\"sqlzoo.ProductModel\")\n",
    "val prod_model_prod = hiveCxt.table(\"sqlzoo.ProductModelProductDescription\")\n",
    "val prod_desc = hiveCxt.table(\"sqlzoo.ProductDescription\")\n",
    "val prod_cat = hiveCxt.table(\"sqlzoo.ProductCategory\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1.\n",
    "**List the SalesOrderNumber for the customer \"Good Toys\" \"Bike World\"**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       " <table>\n",
       "                <tr>\n",
       "                 <th>SalessOrderNumber</th><th>CompanyName</th>\n",
       "                </tr>\n",
       "                <tr><td>null</td><td>Bike World</td></tr><tr><td>SO71774</td><td>Good Toys</td></tr>\n",
       "            </table>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "(order_head\n",
    " .join(cust_aw\n",
    "       .filter(cust_aw(\"CompanyName\").isin(List(\"Good Toys\", \"Bike World\"): _*)),\n",
    "       \"CustomerID\", joinType=\"right\")\n",
    " .select(\"SalessOrderNumber\", \"CompanyName\")\n",
    " .showHTML())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2.\n",
    "**List the ProductName and the quantity of what was ordered by 'Futuristic Bikes'**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       " <table>\n",
       "                <tr>\n",
       "                 <th>Name</th><th>OrderQty</th>\n",
       "                </tr>\n",
       "                <tr><td>ML Mountain Seat/Saddle</td><td>2</td></tr><tr><td>Long-Sleeve Logo Jersey, L</td><td>2</td></tr><tr><td>Classic Vest, S</td><td>3</td></tr>\n",
       "            </table>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "(product.join(order_det, \"ProductID\", joinType=\"right\")\n",
    " .join(order_head, \"SalesOrderID\", joinType=\"right\")\n",
    " .join(cust_aw.filter(cust_aw(\"CompanyName\")===\"Futuristic Bikes\"), \n",
    "       \"CustomerID\", joinType=\"right\")\n",
    " .select(\"Name\", \"OrderQty\")\n",
    " .showHTML())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3.\n",
    "**List the name and addresses of companies containing the word 'Bike' (upper or lower case) and companies containing 'cycle' (upper or lower case). Ensure that the 'bike's are listed before the 'cycles's.**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       " <table>\n",
       "                <tr>\n",
       "                 <th>CompanyName</th><th>AddressLine1</th><th>AddressLine2</th><th>City</th><th>StateProvince</th><th>CountryRegion</th><th>PostalCode</th><th>flag</th>\n",
       "                </tr>\n",
       "                <tr><td>A Bike Store</td><td>2251 Elliot Avenue</td><td>null</td><td>Seattle</td><td>Washington</td><td>United States</td><td>98104</td><td>bike</td></tr><tr><td>A Typical Bike Shop</td><td>One Dancing, Rr</td><td>No. 25 Box 8033</td><td>Round Rock</td><td>Texas</td><td>United States</td><td>78664</td><td>bike</td></tr><tr><td>Advanced Bike Components</td><td>12345 Sterling Avenue</td><td>null</td><td>Irving</td><td>Texas</td><td>United States</td><td>75061</td><td>bike</td></tr><tr><td>Area Bike Accessories</td><td>6900 Sisk Road</td><td>null</td><td>Modesto</td><td>California</td><td>United States</td><td>95354</td><td>bike</td></tr><tr><td>Associated Bikes</td><td>5420 West 22500 South</td><td>null</td><td>Salt Lake City</td><td>Utah</td><td>United States</td><td>84101</td><td>bike</td></tr><tr><td>Authorized Bike Sales and Rental</td><td>490 Ne 4th St</td><td>null</td><td>Renton</td><td>Washington</td><td>United States</td><td>98055</td><td>bike</td></tr><tr><td>Basic Bike Company</td><td>15 East Main</td><td>null</td><td>Port Orchard</td><td>Washington</td><td>United States</td><td>98366</td><td>bike</td></tr><tr><td>Best o' Bikes</td><td>250880 Baur Blvd</td><td>null</td><td>Saint Louis</td><td>Missouri</td><td>United States</td><td>63103</td><td>bike</td></tr><tr><td>Big-Time Bike Store</td><td>9909 W. Ventura Boulevard</td><td>null</td><td>Camarillo</td><td>California</td><td>United States</td><td>93010</td><td>bike</td></tr><tr><td>Bike Dealers Association</td><td>9952 E. Lohman Ave.</td><td>null</td><td>Las Cruces</td><td>New Mexico</td><td>United States</td><td>88001</td><td>bike</td></tr><tr><td>Bike Experts</td><td>Lancaster Mall</td><td>null</td><td>Salem</td><td>Oregon</td><td>United States</td><td>97301</td><td>bike</td></tr><tr><td>Bike Universe</td><td>Nut Tree Factory</td><td>null</td><td>Vacaville</td><td>California</td><td>United States</td><td>95688</td><td>bike</td></tr><tr><td>Bike World</td><td>60025 Bollinger Canyon Road</td><td>null</td><td>San Ramon</td><td>California</td><td>United States</td><td>94583</td><td>bike</td></tr><tr><td>Bikes and Motorbikes</td><td>22580 Free Street</td><td>null</td><td>Toronto</td><td>Ontario</td><td>Canada</td><td>M4B 1V7</td><td>bike</td></tr><tr><td>Bikes for Kids and Adults</td><td>9900 Ronson Drive</td><td>null</td><td>Etobicoke</td><td>Ontario</td><td>Canada</td><td>M9W 3P3</td><td>bike</td></tr><tr><td>Bikes for Two</td><td>63 West Beaver Creek</td><td>null</td><td>Richmond Hill</td><td>Ontario</td><td>Canada</td><td>L4E 3M5</td><td>bike</td></tr><tr><td>Bold Bike Accessories</td><td>Rocky Mountain Pines Outlet</td><td>null</td><td>Loveland</td><td>Colorado</td><td>United States</td><td>80537</td><td>bike</td></tr><tr><td>Cash &amp; Carry Bikes</td><td>Lakeline Mall</td><td>null</td><td>Cedar Park</td><td>Texas</td><td>United States</td><td>78613</td><td>bike</td></tr><tr><td>Coalition Bike Company</td><td>Corporate Office</td><td>null</td><td>El Segundo</td><td>California</td><td>United States</td><td>90245</td><td>bike</td></tr><tr><td>Commendable Bikes</td><td>4781 Highway 95</td><td>null</td><td>Sandpoint</td><td>Idaho</td><td>United States</td><td>83864</td><td>bike</td></tr><tr><td>Economy Bikes Company</td><td>72502 Eastern Ave.</td><td>null</td><td>Bell Gardens</td><td>California</td><td>United States</td><td>90201</td><td>bike</td></tr><tr><td>Eighth Bike Store</td><td>2500 N Serene Blvd</td><td>19th Floor</td><td>El Segundo</td><td>California</td><td>United States</td><td>90245</td><td>bike</td></tr><tr><td>Elite Bikes</td><td>9178 Jumping St.</td><td>null</td><td>Dallas</td><td>Texas</td><td>United States</td><td>75201</td><td>bike</td></tr><tr><td>Elite Bikes</td><td>Po Box 8259024</td><td>null</td><td>Dallas</td><td>Texas</td><td>United States</td><td>75201</td><td>bike</td></tr><tr><td>Engineered Bike Systems</td><td>123 Camelia Avenue</td><td>null</td><td>Oxnard</td><td>California</td><td>United States</td><td>93030</td><td>bike</td></tr><tr><td>Essential Bike Works</td><td>New Millhouse, 2583 Milton Park</td><td>null</td><td>Abingdon</td><td>England</td><td>United Kingdom</td><td>OX14 4SE</td><td>bike</td></tr><tr><td>Express Bike Services</td><td>586 Fulham Road,</td><td>null</td><td>London</td><td>England</td><td>United Kingdom</td><td>SW6 SBY</td><td>bike</td></tr><tr><td>Extended Bike Sales</td><td>The Citadel Commerce Plaza</td><td>null</td><td>City Of Commerce</td><td>California</td><td>United States</td><td>90040</td><td>bike</td></tr><tr><td>Extraordinary Bike Works</td><td>Town East Center</td><td>null</td><td>Mesquite</td><td>Texas</td><td>United States</td><td>75149</td><td>bike</td></tr><tr><td>Family's Favorite Bike Shop</td><td>26910 Indela Road</td><td>null</td><td>Montreal</td><td>Quebec</td><td>Canada</td><td>H1Y 2H5</td><td>bike</td></tr><tr><td>Family's Favorite Bike Shop</td><td>25981 College Street</td><td>null</td><td>Montreal</td><td>Quebec</td><td>Canada</td><td>H1Y 2H5</td><td>bike</td></tr><tr><td>Famous Bike Shop</td><td>999 West Georgia St.</td><td>null</td><td>Vancouver</td><td>Ontario</td><td>Canada</td><td>V5T 1Y9</td><td>bike</td></tr><tr><td>Farthermost Bike Shop</td><td>99000 S. Avalon Blvd. Suite 750</td><td>null</td><td>Carson</td><td>California</td><td>United States</td><td>90746</td><td>bike</td></tr><tr><td>Farthest Bike Store</td><td>45259 Canada Way</td><td>null</td><td>Burnaby</td><td>British Columbia</td><td>Canada</td><td>V5G 4S4</td><td>bike</td></tr><tr><td>Fashionable Bikes and Accessories</td><td>Sports Store At Park City</td><td>null</td><td>Park City</td><td>Utah</td><td>United States</td><td>84098</td><td>bike</td></tr><tr><td>Fifth Bike Store</td><td>2502 Evergreen Ste E</td><td>null</td><td>Everett</td><td>Washington</td><td>United States</td><td>98201</td><td>bike</td></tr><tr><td>First Bike Store</td><td>Kansas City Factory Outlet</td><td>null</td><td>Odessa</td><td>Missouri</td><td>United States</td><td>64076</td><td>bike</td></tr><tr><td>Flawless Bike Shop</td><td>North County Square</td><td>null</td><td>Vista</td><td>California</td><td>United States</td><td>92084</td><td>bike</td></tr><tr><td>Fleet Bikes</td><td>54254 Pacific Ave.</td><td>null</td><td>Stockton</td><td>California</td><td>United States</td><td>95202</td><td>bike</td></tr><tr><td>Friendly Bike Shop</td><td>750 Lakeway Dr</td><td>null</td><td>Bellingham</td><td>Washington</td><td>United States</td><td>98225</td><td>bike</td></tr><tr><td>Friendly Bike Shop</td><td>Port Huron</td><td>null</td><td>Port Huron</td><td>Michigan</td><td>United States</td><td>48060</td><td>bike</td></tr><tr><td>Friendly Neighborhood Bikes</td><td>Johnson Creek</td><td>null</td><td>Johnson Creek</td><td>Wisconsin</td><td>United States</td><td>53038</td><td>bike</td></tr><tr><td>Frugal Bike Shop</td><td>2575 West 2700 South</td><td>null</td><td>Salt Lake City</td><td>Utah</td><td>United States</td><td>84101</td><td>bike</td></tr><tr><td>Full-Service Bike Store</td><td>9920 North Telegraph Rd.</td><td>null</td><td>Pontiac</td><td>Michigan</td><td>United States</td><td>48342</td><td>bike</td></tr><tr><td>Fun Toys and Bikes</td><td>6500 East Grant Road</td><td>null</td><td>Tucson</td><td>Arizona</td><td>United States</td><td>85701</td><td>bike</td></tr><tr><td>Future Bikes</td><td>67255 - 8th Street N.E., Suite 350</td><td>null</td><td>Calgary</td><td>Alberta</td><td>Canada</td><td>T2P 2G8</td><td>bike</td></tr><tr><td>Futuristic Bikes</td><td>25136 Jefferson Blvd.</td><td>null</td><td>Culver City</td><td>California</td><td>United States</td><td>90232</td><td>bike</td></tr><tr><td>Gear-Shift Bikes Limited</td><td>2512-4th Ave Sw</td><td>null</td><td>Calgary</td><td>Alberta</td><td>Canada</td><td>T2P 2G8</td><td>bike</td></tr><tr><td>General Bike Corporation</td><td>69251 Creditview Road</td><td>null</td><td>Mississauga</td><td>Ontario</td><td>Canada</td><td>L5B 3V4</td><td>bike</td></tr><tr><td>Genial Bike Associates</td><td>99450 Highway 59 North</td><td>null</td><td>Humble</td><td>Texas</td><td>United States</td><td>77338</td><td>bike</td></tr>\n",
       "            </table>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "(cust_aw.join(cust_addr, \"CustomerID\")\n",
    " .join(addr, \"AddressID\")\n",
    " .select(\"CompanyName\", \"AddressLine1\", \"AddressLine2\", \"City\",\n",
    "         \"StateProvince\", \"CountryRegion\", \"PostalCode\")\n",
    " .withColumn(\"flag\", when(lower(col(\"CompanyName\")).like(\"%bike%\"), \"bike\")\n",
    "             .when(lower(col(\"CompanyName\")).like(\"%cycle%\"), \"cycle\"))\n",
    " .na.drop(Seq(\"flag\"))\n",
    " .orderBy(\"flag\", \"CompanyName\")\n",
    " .showHTML())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4.\n",
    "**Show the total order value for each CountryRegion. List by value with the highest first.**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       " <table>\n",
       "                <tr>\n",
       "                 <th>CountryRegion</th><th>sum(SubTotal)</th>\n",
       "                </tr>\n",
       "                <tr><td>United Kingdom</td><td>518096.42</td></tr><tr><td>United States</td><td>347336.69</td></tr>\n",
       "            </table>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "(addr.join(order_head, (addr(\"AddressID\")===order_head(\"ShipToAddressID\")))\n",
    " .groupBy(\"CountryRegion\")\n",
    " .sum(\"SubTotal\")\n",
    " .withColumn(\"sum(SubTotal)\", round($\"sum(SubTotal)\", 2))\n",
    " .orderBy(col(\"sum(SubTotal)\").desc)\n",
    " .showHTML())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5.\n",
    "**Find the best customer in each region.**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       " <table>\n",
       "                <tr>\n",
       "                 <th>CountryRegion</th><th>CompanyName</th><th>sum(SubTotal)</th>\n",
       "                </tr>\n",
       "                <tr><td>United Kingdom</td><td>Action Bicycle Specialists</td><td>108561.83</td></tr><tr><td>United States</td><td>Eastside Department Store</td><td>83858.43</td></tr>\n",
       "            </table>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "(addr.join(order_head, (addr(\"AddressID\")===order_head(\"ShipToAddressID\")))\n",
    " .join(cust_aw, \"CustomerID\")\n",
    " .groupBy(\"CountryRegion\", \"CompanyName\")\n",
    " .sum(\"SubTotal\")\n",
    " .withColumn(\"sum(SubTotal)\", round($\"sum(SubTotal)\", 2))\n",
    " .withColumn(\"sn\", rank().over(\n",
    "     Window.partitionBy(\"CountryRegion\").orderBy(col(\"sum(SubTotal)\").desc)))\n",
    " .filter(col(\"sn\")===1)\n",
    " .select(\"CountryRegion\", \"CompanyName\", \"sum(SubTotal)\")\n",
    " .showHTML())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "spark.stop()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Scala",
   "language": "scala",
   "name": "scala"
  },
  "language_info": {
   "codemirror_mode": "text/x-scala",
   "file_extension": ".sc",
   "mimetype": "text/x-scala",
   "name": "scala",
   "nbconvert_exporter": "script",
   "version": "2.12.15"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
